Explore temperature data

Author

Max Lindmark, Jan Ohlberger, Anna Gårdmark

Published

January 15, 2024

Load libraries

pkgs <- c("here", "tidyverse", "readxl", "RColorBrewer", "ISOweek", "viridis", "ncdf4", "reshape2") 

library(here)
#> here() starts at /Users/maxlindmark/Dropbox/Max work/R/perch-growth
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.2     ✔ readr     2.1.4
#> ✔ forcats   1.0.0     ✔ stringr   1.5.0
#> ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
#> ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
#> ✔ purrr     1.0.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(RColorBrewer)
library(viridis)
#> Loading required package: viridisLite
library(ISOweek)
library(ncdf4)
library(reshape2)
#> 
#> Attaching package: 'reshape2'
#> 
#> The following object is masked from 'package:tidyr':
#> 
#>     smiths

# devtools::install_github("seananderson/ggsidekick") # not on CRAN 
library(ggsidekick); theme_set(theme_sleek())

# Load functions
home <- here::here()
fxn <- list.files(paste0(home, "/R/functions"))
invisible(sapply(FUN = source, paste0(home, "/R/functions/", fxn)))

Temperature logger data

home <- here::here()
filenames <- list.files(paste0(home,"/data/temp-data"))
raw_data <- NULL

for(i in 1:length(filenames)) {
  dat <- read_excel(paste0(home, "/data/temp-data/", filenames[i]))
  if(i == 1) names <- colnames(dat)
  if(i != 1) names(dat) <- names
  area <- substr(filenames[i], 1, 2)
  if(area == "SI") area <- substr(filenames[i], 1, 5)
  if(area == "JM") area <- paste0(substr(filenames[i], 1, 2), substr(filenames[i], 9, 10))
  if(area == "JMT0") area <- substr(filenames[i], 1, 2)
  dat$area <- area
  raw_data <- data.frame(rbind(raw_data, dat))
} # JMT0 is really JM_T10 and taken as the main temperature time series for JM
#> New names:
#> • `` -> `...3`

# Filter only one JM series: Which one?
raw_data <- raw_data %>% filter(!area %in% c("JMT2", "JMT3"))

# Depth measurements
log_dat <- raw_data %>%
  mutate(Depth = gsub(",", ".", gsub('m', '', Depth))) %>%
  filter(Depth != "Yta") %>%
  filter(Depth != "Siktdjup") %>%
  mutate_at('Depth', ~str_replace(., "0.3-0.5", "0.5")) %>%
  mutate_at('Depth', ~str_replace(., "ca 1.5", "1.5")) %>%
  mutate(Depth = as.numeric(Depth)) %>%
  drop_na(Depth)

# Filter other columns
log_dat <- log_dat %>%
  select(area, Station_Code, Year, Month, Day, Depth, Mean) %>%
  filter(Depth >= 0.5 & Depth <= 1.5) %>% # restrict depth range
  filter(Year %in% seq(1963, 2019, 1)) %>% # complete years only
  filter(!is.na(Station_Code)) %>%
  filter(Month %in% seq(12)) %>%
  filter(Day %in% seq(31)) %>%
  mutate(Mean = as.numeric(Mean)) %>%
  select(-Station_Code) %>%
  mutate(date = as.Date(paste(Year, Month, Day, sep = "-")))

# Calculate some date variables needed when joining data sets and fitting models
log_dat <- log_dat %>% 
  mutate(yday = yday(date),
         month = month(date)) %>% 
  rename(year = Year,
         temp = Mean) %>% 
  dplyr::select(year, area, temp, yday, month, date) %>% 
  mutate(source = "logger",
         date = as.character(date))

# Plot data over year
log_dat %>%
  ggplot(., aes(x = year, y = temp, color = yday)) +
  geom_point(size = 0.25) +
  scale_color_viridis() +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  labs(x = "Year", y = "Temperature") +
  facet_wrap(~area) +
  NULL


# Plot data over yearday
log_dat %>%
  ggplot(.,aes(x = yday, y = temp, color = factor(year))) +
  geom_point(size = 0.25) +
  scale_color_viridis(discrete = TRUE) +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  theme(axis.text.x = element_text(angle = 90)) +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  labs(x = "Day-of-the-year", y = "Temperature") +
  # facet_grid(cols=vars(area)) +
  facet_wrap(~area) +
  NULL

Temperature at fishing

kul_dat <- read_excel(paste0(home, "/data/temp-data-fishing/temp_at_fishing.xlsx"), skip = 4) %>% 
  mutate(Lokal = ifelse(Lokal == "Simpevarp" & Fångstområde == "Ekö", "SI_EK", Lokal),
         Lokal = ifelse(Lokal == "Simpevarp" & Fångstområde == "Hamnefjärden", "SI_HA", Lokal)) %>% 
  mutate(area = recode_factor(Lokal,
                              "Kvädöfjärden" = "JM", 
                              "Muskö" = "MU",
                              "Holmön" = "HO",
                              "Forsmark" = "FM",
                              "Finbo, Åland" = "FB",
                              "Råneå" = "RA",
                              "Torhamn, Karlskrona Ö skärgård" = "TH",
                              "Biotestsjön, Forsmark" = "BT",
                              "Vinö" = "VN")) %>% 
  rename(temp = Värde,
         year = År, 
         date = `Vittjnings\ndatum`) %>% 
  mutate(db = "KUL",
         yday = yday(date),
         month = month(date),
         id = paste(year, area, sep = "_")) %>% 
  mutate(temp = ifelse(area == "SI_HA" & temp == 267, 26.7, temp)) %>% 
  filter(!area == "Simpevarp") %>% 
  filter(temp < 100) %>% 
  filter(Vertikalstyp == "Yta") %>% 
  select(year, yday, date, temp, area, id, db)
#> New names:
#> • `` -> `...8`
#> • `` -> `...10`

# Plot
ggplot(kul_dat, aes(x = year, y = temp)) +
  geom_point(size = 1) +
  guides(color = "none") +
  labs(x = "Year", y = "Temperature") +
  facet_wrap(~area) +
  NULL


# Now we need to add in the rest of the data from Firre database
home <- here::here()
filenames <- list.files(paste0(home,"/data/temp-data-fishing-firre"))
raw_data<-NULL
for(i in 1:length(filenames)) {
  dat <- read_excel(paste0(home,"/data/temp-data-fishing-firre/",filenames[i]))
  if(i == 1) names <- colnames(dat)
  if(i != 1) names(dat) <- names
  area <- substr(filenames[i], 1, 2)
  if(area == "SI") area <- substr(filenames[i], 1, 5)
  #if(area=="JM") area <- paste0(substr(filenames[i],1,2),substr(filenames[i],9,10))
  if(area == "JMT0") area <- substr(filenames[i], 1, 2)
  dat$area <- area
  raw_data <- data.frame(rbind(raw_data, dat))
} # JMT0 is really JM_T10 and taken as the main temperature time series for JM

# Here I get a warning when I convert to data from Year, Week, day-of-week. Could be due to different date standards (week starting with 0 instead of 1, which I think is the ISO way and how our data are coded. It's however not an option in as.Date, which is why i looked into `ISOweek2date`. But that throws an error for some of the dates. Hmm. A simple test however shows that as.Date returns the correct day (double checked). The warnings from as.Date and the error from ISOweek2date could be due to error in data entry. Will explore. 
# https://stackoverflow.com/questions/45549449/transform-year-week-to-date-object
# https://www.r-bloggers.com/2013/08/date-formats-in-r/
firre_dat <- raw_data %>% 
  mutate(group = ifelse(VkDag > 100, "2", "1")) %>% 
  mutate(VkDag2 = ifelse(group == "1", paste0(0, VkDag), VkDag)) %>%  # add zero before strings with length 2...
  separate(VkDag2, sep = 2, into = c("week", 'day'), extra = 'drop', remove = FALSE) %>% 
  mutate(week = as.numeric(week), # to get rid of the 0
         day = as.numeric(day)) %>% 
  mutate(date = as.Date(paste(Årtal, week, day, sep = "-"), "%Y-%U-%u")) %>% 
  # mutate(week2 = paste0("W", week),
  #        weekdate = paste(Årtal, week2, day, sep = "-")) #%>% 
  mutate(#date = ISOweek2date(weekdate),
         yday = yday(date),
         month = month(date)) %>% 
  rename(year = Årtal,
         stn_nr = Station,
         section_nr = Sektion) %>% 
  filter(!if_all(c(MedelFörTemperatur_i, MedelFörTemperatur_u), is.na)) %>% 
  mutate(MedelFörTemperatur_i = ifelse(is.na(MedelFörTemperatur_i), MedelFörTemperatur_u, MedelFörTemperatur_i),
         MedelFörTemperatur_u = ifelse(is.na(MedelFörTemperatur_u), MedelFörTemperatur_i, MedelFörTemperatur_u)) %>% 
  mutate(temp = (MedelFörTemperatur_i + MedelFörTemperatur_u) / 2,
         db = "FIRRE",
         id = paste(year, area, sep = "_")) %>% 
  select(year, yday, month, day, date, VkDag, temp, area, stn_nr, section_nr, db, id) %>% 
  drop_na(date) %>% 
  mutate(date = as.character(date))
#> Warning: There were 24 warnings in `mutate()`.
#> The first warning was:
#> ℹ In argument: `date = as.Date(paste(Årtal, week, day, sep = "-"),
#>   "%Y-%U-%u")`.
#> Caused by warning in `strptime()`:
#> ! (0-based) yday 369 in year 1972 is invalid
#> ℹ Run `dplyr::last_dplyr_warnings()` to see the 23 remaining warnings.

# Now we need to merge it with fish_dat
fish_dat <- bind_rows(firre_dat %>% filter(!id %in% kul_dat$id), # remove duplicates
                      kul_dat) %>% 
  mutate(source = "fishing")

head(fish_dat)
#>   year yday month day       date VkDag  temp area stn_nr section_nr    db
#> 1 1991  239     8   2 1991-08-27   342 15.75   BS      1          1 FIRRE
#> 2 1992  224     8   2 1992-08-11   322 16.65   BS      1          1 FIRRE
#> 3 1992  226     8   4 1992-08-13   324 16.60   BS      1          1 FIRRE
#> 4 1992  228     8   6 1992-08-15   326 15.85   BS      1          1 FIRRE
#> 5 1994  266     9   5 1994-09-23   385 12.30   BS      1          1 FIRRE
#> 6 1991  239     8   2 1991-08-27   342 15.65   BS      2          1 FIRRE
#>        id  source
#> 1 1991_BS fishing
#> 2 1992_BS fishing
#> 3 1992_BS fishing
#> 4 1992_BS fishing
#> 5 1994_BS fishing
#> 6 1991_BS fishing

fish_dat %>% 
  filter(area == "BT") %>% 
  group_by(source) %>% 
  summarise(min_yr = min(year))
#> # A tibble: 1 × 2
#>   source  min_yr
#>   <chr>    <dbl>
#> 1 fishing   1984

# Plot data over year
fish_dat %>%
  ggplot(aes(x = year, y = temp, color = yday)) +
  geom_point(size = 0.25) +
  scale_color_viridis() +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  labs(x = "Year", y = "Temperature") +
  facet_wrap(~area) +
  NULL


# Plot data over yearday
fish_dat %>%
  ggplot(aes(x = yday, y = temp, color = factor(year))) +
  geom_point(size = 0.25) +
  scale_color_viridis(discrete = TRUE) +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  theme(axis.text.x = element_text(angle = 90)) +
  guides(color = guide_legend(override.aes = list(size = 1))) +
  labs(x = "Day-of-the-year", y = "Temperature") +
  facet_wrap(~area) +
  NULL

ERSST data

# Longitude and latitude attributes for each area
area <- c("BS", "BT", "FB", "FM", "HO", "JM", "MU", "RA", "SI_EK", "SI_HA", "TH", "VN")
nareas <- length(area)
lat <- c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5)
lon <- c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9)
area_attr <- data.frame(cbind(area = area, lat = lat, lon = lon)) %>%
  mutate_at(c("lat", "lon"), as.numeric)

# Download ERSST data - only run when update needed
# Since the ERSST file is to big for github, you have to run the code below to re-load it and save it in the folder specified in the for loop, or simply use the output of this script which is on github.
# sst_dat_all <- get_ersst_data(years=seq(1940,2022),data.dir=paste0(home,"/data"), latrange=c(55,66),lonrange=c(15,23))

# SST based on ERSST data with relatively low spatial resolution (2x2 degrees)
# Need to cover at least 2x2 grid area with even numbers for longitude/latitude
sst_areas <- NULL
lat_ranges <- lon_ranges <- list() # for testing only
for(a in 1:nareas) {
  lat_range <- c(2*floor(area_attr$lat[a]/2), 2*floor(area_attr$lat[a]/2) + 2)
  lon_range <- c(2*floor(area_attr$lon[a]/2), 2*floor(area_attr$lon[a]/2) + 2)
  sst_area <- load_ersst_data(years = c(1940, 2022), data.dir = paste0(home, "/data"), ncfilename = "sst.mnmean.nc", latrange = lat_range, lonrange = lon_range)
  sst_area$area <- area_attr$area[a]
  sst_areas <- bind_rows(sst_areas, sst_area)
  lat_ranges[[a]] <- lat_range
  lon_ranges[[a]] <- lon_range
}
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.
#> Joining with `by = join_by(lon_index)`
#> Joining with `by = join_by(lat_index)`
#> Joining with `by = join_by(time_index)`
#> Rows: 3944 Columns: 7
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (1): month dbl (5): sst, lon, lat, time, year date (1): date
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> `summarise()` has grouped output by 'year'. You can override using the
#> `.groups` argument.

latranges <- data.frame(matrix(unlist(lat_ranges), ncol = 2, byrow = T))
lonranges <- data.frame(matrix(unlist(lon_ranges), ncol = 2, byrow = T))

# Plot SST by area in each month
sst_areas %>%
  ggplot(aes(x = year, y = meanSST, group = as.factor(month), color = as.factor(month))) +
  geom_line() +
  scale_color_viridis(discrete = TRUE) +
  scale_x_continuous(breaks = seq(1940, 2020, 10)) +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text.x = element_text(angle= 90)) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  labs(x = "Year", y = "Mean SST", title = "Mean SST in each month by area", color = "Month") +
  facet_wrap(~area, scale = "free_y") +
  NULL


sst_areas <- sst_areas %>% 
  mutate(date = paste(year, month, 15, sep = "-"),
         yday = yday(date),
         source = "errs",
         month = as.numeric(month)) %>% 
  rename(temp = meanSST)

errs_dat <- sst_areas

# Plot SST by month in each area
errs_dat %>%
  ggplot(aes(x = year, y = temp, group = as.factor(area), color = as.factor(area))) +
  geom_line() +
  scale_color_brewer(palette = "Set3") +
  scale_x_continuous(breaks = seq(1940, 2020, 10)) +
  theme(plot.title = element_text(size = 15, face = "bold")) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  labs(x = "Year", y = "Mean SST", title = "Mean SST in each month by area", color = "Area") +
  facet_wrap(~month, scale = "free_y") +
  NULL

all_temp <- bind_rows(log_dat, errs_dat, fish_dat)

# See if we can compare the mean temperature in a time of the year that have overlap
all_temp %>% 
  filter(area == "FB") %>% 
  ggplot(aes(yday, fill = source)) + 
  facet_wrap(~year, scales = "free_y") + 
  geom_density(alpha = 0.5, color = NA)


# Plot some overlapping year and areas
all_temp %>% 
  filter(area == "VN" &  year > 1995) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  facet_wrap(~year)


all_temp %>% 
  filter(area == "JM" & year < 1995 & year > 1975) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  facet_wrap(~year)


all_temp %>% 
  filter(area == "FM" & year < 2000 & year > 1980) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  facet_wrap(~year)


# Some examples: OK match but not in BT which makes sense because the ERSS data isn't fine scale enough
all_temp %>% 
  filter(area == "FB" & year %in% c(1998:2001)) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  geom_line() + 
  facet_wrap(~year)


all_temp %>% 
  filter(area == "SI_EK" & year %in% c(1998:2001)) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  geom_line() + 
  facet_wrap(~year)


all_temp %>% 
  filter(area == "BT" & year %in% c(1998:2001)) %>% 
  ggplot(aes(yday, temp, color = source)) +
  geom_point() + 
  geom_line() + 
  facet_wrap(~year)


# Take a close look at SI_EK 1995
all_temp %>% 
  filter(source == "logger" & area == "SI_EK" & year == 1996) %>% 
  ggplot(aes(yday, temp)) +
  geom_point() + 
  facet_grid(area~year)


# Filter the row of very low values - broken logger?
all_temp %>% 
  filter(source == "logger" & area == "SI_EK" & year == 1996) %>% 
  mutate(keep = ifelse(temp < 6.54, "N", "Y")) %>% 
  ggplot(aes(yday, temp, color = keep)) +
  geom_point() + 
  facet_grid(area~year)


all_temp <- all_temp %>%
  mutate(keep = ifelse(source == "logger" & area == "SI_EK" & year == 1996 & temp < 6.54, "N", "Y")) %>%
  filter(keep == "Y") %>%
  select(-keep)

Save data!

# Save data for model fitting
write_csv(all_temp, paste0(home, "/data/for-analysis/temp_data_for_fitting.csv"))